//do /Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Code_JPEmicro/MakeShare_Inc_Heter.do

clear all


global pathname="/Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Data"
global resultspath="/Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Data_JPEmicro/Matlab_estimation"
global censuspath="/Users/shoude/Dropbox/eegap/EEgap_data_code_heter_SM/Data"


local process 0

pause on

if `process' == 1 {

use $pathname/lcidemo_all_046_jan2008_dec2008_struct_nocensor, replace
	append using $pathname/lcidemo_all_046_jan2009_dec2009_struct_nocensor
	append using $pathname/lcidemo_all_046_jan2010_dec2010_struct_nocensor
    append using $pathname/lcidemo_all_046_jan2011_dec2011_struct_nocensor
	append using $pathname/lcidemo_all_046_jan2012_dec2012_struct_nocensor
	
	
	replace tri=tri+3 if year==2009
    replace tri=tri+6 if year==2010
    replace tri=tri+9 if year==2011
    replace tri=tri+12 if year==2012
    
    
    replace month=month+12 if year==2009
    replace month=month+24 if year==2010
    replace month=month+36 if year==2011
    replace month=month+48 if year==2012
    
    gen week_num=week
    replace week_num=week_num+52  if year==2009
    replace week_num=week_num+104 if year==2010
    replace week_num=week_num+156 if year==2011
    replace week_num=week_num+208 if year==2012
    
//Construction of a robust choice set
   
    //For 2008, we are being conservative and drop observations if the pid disappeared during the second trimester 
	sort pid week_num
	by pid: egen max_week=max(week_num)
	drop if max_week<31 & trimester==2 & year==2008
	
/*	
    sort pid zipcode
	by pid zipcode: egen max_month=max(month)
	by pid zipcode: egen max_week_num=max(week_num)
	by pid: egen min_max_month=min(max_month)
*/
		/*
	. tab max_month if delisted==1 & delisted2010!=1

  max_month |      Freq.     Percent        Cum.
------------+-----------------------------------
          1 |      8,577        8.26        8.26
          2 |      9,864        9.49       17.75
          3 |     18,050       17.37       35.12
          4 |      9,349        9.00       44.12
          5 |      8,842        8.51       52.64
          6 |      3,019        2.91       55.54
          7 |      2,399        2.31       57.85
          8 |      2,478        2.39       60.24
          9 |      1,803        1.74       61.97
         10 |         87        0.08       62.05
         11 |      2,031        1.95       64.01
         12 |      1,880        1.81       65.82
         13 |      1,812        1.74       67.56
         14 |      1,257        1.21       68.77
         15 |      1,623        1.56       70.34
         16 |      1,340        1.29       71.63
         17 |      1,869        1.80       73.42
         18 |      1,858        1.79       75.21
         19 |      2,591        2.49       77.71
         20 |      2,900        2.79       80.50
         21 |      2,598        2.50       83.00
         22 |      2,770        2.67       85.67
         23 |      4,228        4.07       89.74
         24 |      4,250        4.09       93.83
         25 |      1,829        1.76       95.59
         26 |        631        0.61       96.19
         27 |        589        0.57       96.76
         28 |        896        0.86       97.62
         29 |        894        0.86       98.48
         30 |      1,059        1.02       99.50
         31 |        169        0.16       99.67
         32 |         91        0.09       99.75
         33 |         40        0.04       99.79
         34 |         14        0.01       99.81
         35 |         26        0.03       99.83
         36 |          6        0.01       99.84
         37 |         19        0.02       99.85
         38 |         11        0.01       99.87
         39 |         12        0.01       99.88
         40 |         11        0.01       99.89
         41 |         11        0.01       99.90
         42 |         20        0.02       99.92
         43 |         16        0.02       99.93
         44 |         16        0.02       99.95
         45 |         21        0.02       99.97
         46 |          6        0.01       99.97
         47 |          8        0.01       99.98
         48 |         19        0.02      100.00
------------+-----------------------------------
      Total |    103,889      100.00
	
	tab max_month if delisted2010==1
			 max_month |      Freq.     Percent        Cum.
		------------+-----------------------------------
		         25 |        652        7.33        7.33
		         26 |        852        9.58       16.91
		         27 |      1,399       15.73       32.65
		         28 |      3,044       34.23       66.88
		         29 |      2,402       27.01       93.89
		         30 |        313        3.52       97.41
		         31 |        113        1.27       98.68
		         32 |         46        0.52       99.20
		         33 |         28        0.31       99.52
		         34 |         23        0.26       99.78
		         35 |          5        0.06       99.83
		         36 |          5        0.06       99.89
		         38 |          5        0.06       99.94
		         39 |          5        0.06      100.00
		------------+-----------------------------------
		      Total |      8,892      100.00
	*/
	//drop if month>=max_month & delisted2010==1
	
	drop if month>=30 & delisted2010==1
    drop if month>=25 & delisted==1
	
    sort pid zipcode
	by pid zipcode: egen max_month=max(month)
	by pid zipcode: egen max_week_num=max(week_num)
	by pid: egen min_max_month=min(max_month)
    
	/*
	//Select households that made only one purchase during the
	//whole sample period: 2008-2011  
    by hd_id, sort: egen nb_purchase=count(count)
    tab nb_purchase
    drop if nb_purchase>1
    */
    
    //Create identifiers for the various demographic groups 
    
    //Household identifier
    by hd_id, sort: egen nb_purchase=count(count)
    tab nb_purchase
    mvencode o_qty,mv(1) over
    by hd_id, sort: egen total_qty=sum(o_qty) 
    tab total_qty
    gen Dhd=1
    replace Dhd=0 if total_qty>1 | nb_purchase>1
    
    gen Dcontractor=0
    replace Dcontractor=1 if (total_qty>2 | nb_purchase>2) & total_qty!=.  & nb_purchase!=.   
    
    gen miss_demo=0
    replace miss_demo=1 if income==. | age==. | adult==. | children==. | education==.
    
    gen Dhd_income_1=0
    gen Dhd_income_2=0
    gen Dhd_income_3=0
    gen Dhd_income_4=0	
    gen Dhd_income_5=0
    gen Dhd_income_6=0
    
    replace Dhd_income_1=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==1
  	replace Dhd_income_2=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==2
  	replace Dhd_income_3=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==3
  	replace Dhd_income_4=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==4
  	replace Dhd_income_5=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==5
  	replace Dhd_income_6=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_six==6
  	
  	gen Dhd_income_tert1=0
    gen Dhd_income_tert2=0
    gen Dhd_income_tert3=0
  	replace Dhd_income_tert1=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==1
  	replace Dhd_income_tert2=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==2
  	replace Dhd_income_tert3=1 if Dhd==1 & miss_demo==0 & rent!="R" & income_tert==3
  	
  	gen Dhd_renter_1=0
  	gen Dhd_renter_2=0
    replace Dhd_renter_1=1 if Dhd==1 & miss_demo==0 & rent=="R" & income<6
    replace Dhd_renter_2=1 if Dhd==1 & miss_demo==0 & rent=="R" & income>=6
    
    
    gen Dhd_miss_demo=0
    replace Dhd_miss_demo=1 if Dhd==1 & miss_demo==1
    
    
    gen Dnonhd_tert1=0
    gen Dnonhd_tert2=0
    gen Dnonhd_tert3=0
    replace Dnonhd_tert1=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==1
    replace Dnonhd_tert2=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==2
    replace Dnonhd_tert3=1 if Dhd==0 & miss_demo==0 & rent!="R" & income_tert==3
    
    
    gen Dcontractor_tert1=0
    gen Dcontractor_tert2=0
    gen Dcontractor_tert3=0
    replace Dcontractor_tert1=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==1
    replace Dcontractor_tert2=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==2
    replace Dcontractor_tert3=1 if Dcontractor==1 & miss_demo==0 & rent!="R" & income_tert==3
    
    gen group_id=0
    replace group_id=1 if Dhd_income_1==1
    replace group_id=2 if Dhd_income_2==1
    replace group_id=3 if Dhd_income_3==1
    replace group_id=4 if Dhd_income_4==1
    replace group_id=5 if Dhd_income_5==1
    replace group_id=6 if Dhd_income_6==1
    replace group_id=7 if Dhd_renter_1==1
    replace group_id=8 if Dhd_renter_2==1
    replace group_id=9 if Dnonhd_tert1==1
    replace group_id=10 if Dnonhd_tert2==1
    replace group_id=11 if Dnonhd_tert3==1
    replace group_id=12 if Dhd_miss_demo==1
    replace group_id=13 if Dcontractor_tert1==1
    replace group_id=14 if Dcontractor_tert2==1
    replace group_id=15 if Dcontractor_tert3==1
    //replace group_id=16 if Dhd==1 & miss_demo==0 & rent!="R"
    //replace group_id=17 if Dhd==1 & rent!="R"
    //replace group_id=18 if Dhd==0 & miss_demo==0 
    //replace group_id=19 if Dhd==0 
    //replace group_id=20 if miss_demo==0 
  
save $pathname/lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017_gr_1_15, replace
}

use $pathname/lcidemo_046_2008_2012_struct_v11_11022017_robustb_nocensor_11022017_gr_1_15, clear

keep if group_id > 0 & group_id < 7

sort zipcode
merge zipcode using $censuspath/mapping_zip_county_nov99
tab _m

pause

keep if _m == 3 
drop _m
gen nb_hd = 1
collapse(sum)  nb_hd ,by(county5 group_id)
rename county5 county
bys county: egen tot_hd = sum(nb_hd)
gen share_inc_heter = nb_hd/tot_hd

fillin county group_id
mvencode share_inc_heter,mv(0) over

bys group_id: egen share_inc_mean = mean(share_inc_heter)
bys group_id: egen share_inc_sd = sd(share_inc_heter)

sum share_inc_mean if group_id==1
sum share_inc_mean if group_id==2
sum share_inc_mean if group_id==3
sum share_inc_mean if group_id==4
sum share_inc_mean if group_id==5
sum share_inc_mean if group_id==6

sum share_inc_sd if group_id==1
sum share_inc_sd if group_id==2
sum share_inc_sd if group_id==3
sum share_inc_sd if group_id==4
sum share_inc_sd if group_id==5
sum share_inc_sd if group_id==6

pause

preserve

keep group_id share_inc_heter county
reshape wide share_inc_heter, i(county) j(group_id)
save "$resultspath/share_inc_heter_county", replace
outsheet using "$resultspath/share_inc_heter_county.csv", replace comma noquote noname


restore
